Exported from analysis/transportation-rail.ipynb committed by GitHub Action on Sat Feb 19 00:54:11 2022 revision 1, 0e9088e
A look at railroad freight volumes, according to reports obtained from BNSF, CSX, Norfolk Southern, and Union Pacific.
import pandas as pd
import altair as alt
import numpy as np
import re
from joblib import Memory
from time import sleep
from datetime import date
from IPython.core.display import HTML
from tabula import read_pdf
from tika import parser
from urllib.request import urlopen, Request
from urllib.parse import urlencode
from urllib.error import URLError
from http.client import InvalidURL
from json import dumps, loads
# need {unidecode, fake_useragent}
from googleapi.google import search as googleSearch
from urllib.error import HTTPError
from io import BytesIO
from os import environ
memory = Memory('data/', verbose=0)
uaString = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
if environ.get("GITHUB_WORKFLOW"):
raise RuntimeError("Requires manual updates")
%%capture
%%time
@memory.cache
def parseCsxReport(urlAndWeek):
try:
url, weekAndYear = urlAndWeek
# '2013 Week 29'
year, week = weekAndYear.split(' Week ')
except ValueError:
try:
url, weekAndYear = urlAndWeek
# '2013 Week 29'
year, week = weekAndYear.split('-WK')
except ValueError:
print(urlAndWeek)
return None
# WEEK 28 QUARTER TO DATE YEAR TO DATE
baseCols = ['Cargo', 'Week {0} {1}', 'Week {0} {2}', 'Week {0} pct-change',
'Week {0} {1} QTD', 'Week {0} {2} QTD', 'Week {0} QTD pct-change',
'Week {0} {1} YTD', 'Week {0} {2} YTD', 'Week {0} YTD pct-change']
try:
cols = [c.format(week.split(' ')[0], int(year), int(year)-1) for c in baseCols]
except ValueError:
print(urlAndWeek)
return None
try:
tmp = read_pdf(url, pages='all', pandas_options={ 'names': cols })
df = pd.concat(tmp) if isinstance(tmp, list) else tmp
return df.iloc[1:, :].set_index('Cargo').applymap(lambda v: float(re.sub('[^0-9.]', '', v)))
except AttributeError:
print(urlAndWeek)
return None
except ValueError:
print(urlAndWeek)
return None
#parseReport(('http://csx.gcs-web.com/static-files/f2baf981-9658-41f8-9ec0-400e9fdbde11', '2013 Week 29')).head(30)
#parseReport(('http://csx.gcs-web.com/static-files/2d4f737e-1314-46aa-9e8c-a29b40be2a2b', '2019 Week 49 AAR')).head(30)
def getWeekFrom(u):
return re.findall("(?:Week-|WK|Week_|w|week)(\d\d?)", u)[0]
#getWeekFrom('https://s2.q4cdn.com/859568992/files/doc_downloads/volume_trends/2014/CSX_AAR-2014-WK20_csx.pdf')
def getUrlsAndWeeks(res):
urls = []
for item in res['GetContentAssetListResult']:
u = item['FilePath']
if "2020" in u:
year = 2020
elif "2021" in u:
year = 2021
else:
year = u.split("/")[-2]
try:
week = getWeekFrom(u)
yearAndWeek = f"{year} Week {week}"
urls.append((u, yearAndWeek))
except IndexError:
continue
return urls
reportListUrl = "https://investors.csx.com/Services/ContentAssetService.svc/GetContentAssetList"
params = lambda y: {
"serviceDto": {
"ViewType": "2",
"ViewDate": "",
"RevisionNumber": "1",
"LanguageId": "1",
"Signature": "",
"ItemCount": -1,
"StartIndex": 0,
"TagList": [],
"IncludeTags": True
},
"assetType": "Volume Trends",
"excludeSelection": 1,
"year": y
}
headers = {
'User-Agent': uaString,
'Content-Type': 'application/json; charset=UTF-8'
}
urls = []
for y in range(2014, 2022):
req = Request(reportListUrl, data=dumps(params(y)).encode('ascii'), headers=headers)
data = urlopen(req)
res_dict = loads(data.read())
urls = urls + getUrlsAndWeeks(res_dict)
dfs = map(parseCsxReport, urls)
merged = pd.concat(dfs, axis=1, sort=False).T
#merged.head()
subset = merged.filter(axis='index', regex='[0-9]$').copy()
subset['dt'] = subset.index.map(lambda v: pd.to_datetime(v.replace(' AAR', '') + ' 6', format='Week %W %Y %w', errors='coerce'))
HTML("<h4><i>(data current as of {})</i></h4>".format(date.today().strftime("%B %d, %Y")))
def doChart(metric, df, color='grey'):
pmp = df[['dt', metric]].sort_values('dt').copy()
pmp2 = pmp.set_index('dt').resample('1M').mean().dropna().reset_index()
brush = alt.selection(type='interval', encodings=['x'])
base = alt.Chart(pmp2).mark_line(color='purple').encode(
alt.X('dt:T', axis=alt.Axis(title='', format='%b %Y')),
alt.Y(f'{metric}:Q', axis=alt.Axis(title='Volume [carloads]')),
tooltip=[alt.Tooltip("dt:T", format="%b %Y"), alt.Tooltip(f"yoy:Q", format=",.02f")]
).properties(
width=750,
height=450
)
upper = base.mark_bar(color=color).transform_window(
window=[alt.WindowFieldDef("lag", metric, param=12, **{ 'as': 'previous' })],
sort=[{"field": "dt"}],
).transform_calculate(
yoy=f"((datum['{metric}'] - datum.previous) / datum.previous) * 100"
).encode(
alt.X('dt:T', axis=alt.Axis(title='', format='%b %Y'), scale=alt.Scale(domain=brush), impute=alt.ImputeParams(value=None)),
alt.Y('yoy:Q', axis=alt.Axis(title='52-Week Growth [%]')),
color=alt.condition("datum['yoy'] < 0",
alt.value('lightsalmon'),
alt.value(color)
),
tooltip=[alt.Tooltip('dt:T', format='%B %Y', title='Period'), alt.Tooltip('yoy:Q', format=',.02f')]
).properties(
title=f'CSX Freight Reports: {metric}'
)
lower = base.properties(
height=100
).add_selection(brush)
return (upper & lower).properties(
background='white'
)
#return c.display()
def doYoyChart(df, metric = 'MOTOR VEHICLES', carrier="BNSF", period=52, freq='W', subset=-260, size=2, color='royalblue'):
#yoy = df.set_index('dt').pct_change(period, freq=freq).apply(lambda v: v * 100).sort_index().reset_index().copy()
# .transform_calculate(
# val=f"datum['{metric}'] > 100 ? NaN : datum['{metric}']"
#)
brush = alt.selection(type='interval', encodings=['x'])
base = alt.Chart(df[subset:]).mark_line(color='purple').encode(
alt.X('dt:T', axis=alt.Axis(title='', format='%b %Y')),
alt.Y(f'{metric}:Q', axis=alt.Axis(title='Volume [carloads]')),
tooltip=[
alt.Tooltip("dt:T", format="%b %Y"),
alt.Tooltip(f"{metric}:Q")
]
).properties(
width=750,
height=450
)
upper = base.mark_bar(size=size, color=color).transform_window(
window=[alt.WindowFieldDef("lag", metric, param=period, **{ 'as': 'previous' })],
sort=[{"field": "dt"}],
).transform_calculate(
yoy=f"((datum['{metric}'] - datum.previous) / datum.previous) * 100"
).encode(
alt.X('dt:T', axis=alt.Axis(title='', format='%b %Y'), scale=alt.Scale(domain=brush), impute=alt.ImputeParams(value=None)),
alt.Y('yoy:Q', axis=alt.Axis(title='Year-over-year Growth [%]'), impute=alt.ImputeParams(method='value', keyvals=[100, 1000])),
color=alt.condition(f"datum['yoy'] < 0",
alt.value('lightsalmon'),
alt.value(color)
),
tooltip=[
alt.Tooltip('dt:T', format='%b %Y', title=''),
alt.Tooltip(f'yoy:Q', format=',.0f', title='Year-over-year Growth [%]')
]
).properties(
title=f"{carrier} trend for: {metric}"
)
lower = base.properties(
height=100
).add_selection(brush)
return (lower & upper).properties(
background='white'
)
metric = 'Total Traffic'
doChart(metric, subset, color='royalblue')
metric = 'Total Carloads'
doChart(metric, subset, color='royalblue')
metric = 'Total Intermodal'
doChart(metric, subset, color='royalblue')
doChart('Primary Metal Products', subset, color='darkslategray')
metric = 'Lumber & Wood Products'
doChart(metric, subset, color='darkolivegreen')
metric = 'Motor Vehicles and Parts'
doChart(metric, subset, color='darkslategray')
metric = 'Containers'
doChart(metric, subset, color='royalblue')
metric = 'Coal'
doChart(metric, subset, color='black')
metric = 'Chemicals'
doChart(metric, subset, color='darkseagreen')
metric = 'Food Products'
doChart(metric, subset, color='darkolivegreen')
metric = 'Grain'
doChart(metric, subset, color='darkseagreen')
metric = 'Grain Mill Products'
doChart(metric, subset, color='teal')
# https://www.up.com/investor/aar-stb_reports/
from subprocess import CalledProcessError
upUrls = []
for y in range(2005, 2015):
url = "'http://www.up.com/investors/reports/archive.cfm?Year={}'".format(y)
res = !curl -L $url
pdfs = re.findall('href="([^.]+.pdf)"', "".join(res))
upUrls = upUrls + pdfs
@memory.cache
def parseUnionPacificArchive(u, retries=3):
"""
Union Pacific Parser
"""
if retries < 0:
return None
theUrl = "http://up.com" + u
try:
week = re.search(".*[0-9]{4}/(\d\d).pdf.*", u).group(1)
except AttributeError:
try:
week = re.search(".*[0-9]{4}/week(\d\d)\_\d\d.pdf.*", u).group(1)
except AttributeError:
print("Failed: " + u)
return None
#print(week)
try:
req = Request(theUrl, data=None, headers={ 'User-Agent': uaString })
data = urlopen(req)
df_up = read_pdf(BytesIO(data.read()), pages='all', multiple_tables=False)[0]
originalColumns = df_up.columns.to_list()
df_up.columns = [str(originalColumns[1]) + '-' + str(week)] + originalColumns[1:]
year = str(df_up.columns[1])
if year[:2] in ['5,', '6,', '7,', '8,']:
print(f"Invalid year for {u}")
print(originalColumns)
return None
if year == '2009':
# fetch 2008 data from the 2009 previous year column
tmp = pd.concat([df_up.set_index(df_up.columns[0])[df_up.columns[1]].rename(year + '-' + str(week)),
df_up.set_index(df_up.columns[0])[df_up.columns[2]].rename("2008" + '-' + str(week))], axis=1)
else:
tmp = df_up.set_index(df_up.columns[0])[df_up.columns[1]].rename(year + '-' + str(week))
return tmp
except CalledProcessError:
return None
except KeyError:
print("Failed: " + u)
sleep(2 ** (3 - retries))
return parseUnionPacificArchive(theUrl, retries-1)
except IndexError:
print("Failed: " + u)
sleep(2 ** (3 - retries))
return parseUnionPacificArchive(theUrl, retries-1)
except URLError:
print("Failed: " + u)
sleep(2 ** (3 - retries))
return parseUnionPacificArchive(theUrl, retries-1)
#parseArchive(upUrls[1]).head()
%%capture
#%%time
dfs_a = []
for u in upUrls:
if "changes" in u:
continue
dfs_a.append(parseUnionPacificArchive(u))
df_aone = pd.concat(dfs_a, axis=1).T
validCols = [c for c in df_aone.columns if c]
df_aone[validCols] = df_aone[validCols].applymap(lambda v: str(v).replace(",", "") if not pd.isnull(v) else v)\
.applymap(lambda v: pd.to_numeric(v, errors='coerce'))
cleanColumn = lambda s: re.sub("^\d+ ", "", str(s)).replace(", N.E.C", "").replace("Prod.", "Products")
df_aone.columns = [cleanColumn(c) for c in df_aone.columns]
#df_aone.columns
tab = df_aone.T.reset_index().melt(id_vars='index').groupby(['index', 'variable']).agg('mean')\
.reset_index().pivot(index='variable', columns='index')
tab = tab.droplevel(level=0, axis=1)
tab.index = tab.index.rename('dt')
#tab.head()
%%capture
#%%time
# curl -L 'https://www.up.com/up/investor/aar-stb_reports/2015_carloadings/index.htm'
upUrls2 = []
for y in range(2015, 2021):
url = "'https://www.up.com/up/investor/aar-stb_reports/{}_carloadings/index.htm'".format(y)
res = !curl -L $url
pdfs = re.findall('href="([^.]+.pdf)"', "".join(res))
upUrls2 = upUrls2 + pdfs
# For 2021
new_url = "https://www.up.com/investor/aar-stb_reports/2021_Carloads/index.htm"
new_res = !curl -L $new_url
new_pdfs = re.findall('href="([^.]+.pdf)"', "".join(new_res))
upUrls2 = upUrls2 + new_pdfs
@memory.cache
def parseUnionPacificArchive2(url, retries=3):
"""
Union Pacific Parser 2
"""
if retries < 0:
return None
theUrl2 = "http://up.com" + url
try:
week = re.search(".*_[\d]{4}-(\d\d?).pdf.*", url).group(1)
except AttributeError:
try:
week = re.search(".*week_(\d\d?)\_[\d]{4}[^.]+.pdf.*", url).group(1)
except AttributeError:
try:
week = re.search(".*_[\d]{4}_(\d\d?)[^\.]*.pdf.*", url).group(1)
except AttributeError:
print("Failed week: " + url)
return None
#print(week)
try:
req = Request(theUrl2, data=None, headers={ 'User-Agent': uaString })
data = urlopen(req)
df_up = read_pdf(BytesIO(data.read()), pages='all', multiple_tables=False)[0]
df_up.columns = [str(df_up.columns[1]) + '-' + str(week)] + df_up.columns.to_list()[1:]
tmp = df_up.set_index(df_up.columns[0])[df_up.columns[1]].rename(str(df_up.columns[1]) + '-' + str(week))
return tmp
except IndexError:
print("Failed index: " + url)
sleep(2 ** (3 - retries))
return parseUnionPacificArchive2(url, retries-1)
except HTTPError:
print("Failed url: " + url)
sleep(2 ** (3 - retries))
return parseUnionPacificArchive2(url, retries-1)
except InvalidURL:
print("Failed url: " + url)
return None
# parseArchive2(upUrls2[2]).head()
dfs_atwo = []
for u in upUrls2:
if "changes" in u:
continue
dfs_atwo.append(parseUnionPacificArchive2(u))
df_atwo = pd.concat(dfs_atwo, axis=1).T# .head()
validCols = [c for c in df_atwo.columns if c]
df_atwo[validCols] = df_atwo[validCols].applymap(lambda v: str(v).replace(",", "") if not pd.isnull(v) else v)\
.applymap(lambda v: pd.to_numeric(v, errors='coerce'))
#df_atwo.columns
df_unionpac = pd.concat([tab, df_atwo])
df_unionpac['dt'] = df_unionpac.index.map(lambda v: pd.to_datetime(v+"-6", format="%Y-%W-%w", errors='coerce'))
#df_unionpac.head()
df_unionpac_yoy = df_unionpac.groupby('dt').agg('mean').resample('1W').nearest().pct_change(52, freq='W').apply(lambda v: v * 100).reset_index()
#df_unionpac_yoy.tail()
def doUpChart(df, metric='Total Carloads', color='purple'):
return alt.Chart(df_unionpac.reset_index()[['dt', metric]]).mark_line(color=color).encode(
alt.X('dt:T', axis=alt.Axis(title='')),
alt.Y('{}:Q'.format(metric))
).properties(
title=f"Union Pacific Railroad: {metric} History",
width=750,
height=400
)
doUpChart(df_unionpac, 'Total Carloads')
def doYoyUpChart(df, metric, color='royalblue', domain=[-30, 30]):
brush = alt.selection(type='interval', encodings=['x'])
base = alt.Chart(df).mark_line(color='purple').encode(
alt.X('dt:T', axis=alt.Axis(title='', format='%b %Y'), impute=alt.ImputeParams(value=None)),
alt.Y(f'{metric}:Q', axis=alt.Axis(title='Volume [carloads]'), impute=alt.ImputeParams(value=None)),
tooltip=[alt.Tooltip("dt:T", format="%b %Y"), alt.Tooltip(f"yoy:Q", format=",.02f")]
).properties(
width=750,
height=450
)
upper = base.mark_bar(size=1.5, color=color).transform_window(
window=[alt.WindowFieldDef("lag", metric, param=52, **{ 'as': 'previous' })],
sort=[{"field": "dt"}],
).transform_calculate(
yoy=f"((datum['{metric}'] - datum.previous) / datum.previous) * 100"
).encode(
alt.X('dt:T', axis=alt.Axis(title='', format='%b %Y'), scale=alt.Scale(domain=brush), impute=alt.ImputeParams(value=None)),
alt.Y('yoy:Q', axis=alt.Axis(title='Year-over-year Growth [%]'), scale=alt.Scale(domain=domain)),
color=alt.condition(f"datum['yoy'] < 0",
alt.value('lightsalmon'),
alt.value(color)
),
tooltip=[alt.Tooltip('dt:T', format='%b %Y'), alt.Tooltip(f'yoy:Q', format=',.0f')]
).properties(
title=f"Union Pacific Railroad: {metric} Traffic"
)
lower = base.properties(
height=100
).add_selection(brush)
return (upper & lower).properties(
background='white'
)
c = doYoyUpChart(df_unionpac, 'Total Carloads')
c.save('transportation-rail.png')
c.display()
metric = 'Chemicals'
#doUpChart(df_unionpac, metric)
doYoyUpChart(df_unionpac, metric, color='darkseagreen')
metric = 'Lumber & Wood Products'
#doUpChart(df_unionpac, metric)
doYoyUpChart(df_unionpac, metric, color='forestgreen', domain=[-60, 60])
metric = 'Iron & Steel Scrap'
#doUpChart(df_unionpac, metric)
doYoyUpChart(df_unionpac, metric, color='slategray', domain=[-80, 80])
metric = 'Metals & Products'
#doUpChart(df_unionpac, metric)
doYoyUpChart(df_unionpac, metric, color='slategray', domain=[-80, 120])
metric = 'Motor Vehicles & Equipment'
#doUpChart(df_unionpac, metric)
doYoyUpChart(df_unionpac, metric, color='black', domain=[-85, 125])
metric = 'Metallic Ores'
#doUpChart(df_unionpac, metric)
doYoyUpChart(df_unionpac, metric, color='darkslategray', domain=[-85, 170])
metric = 'Nonmetallic Minerals'
#doUpChart(df_unionpac, metric)
doYoyUpChart(df_unionpac, metric, color='gray', domain=[-65, 100])
%%time
raise RuntimeError("Broken")
numPages = 6
"""res2 = []
for page in range(1, numPages+1):
sleep(1)
res2 += googleSearch('site:nscorp.com filetype:pdf weekly merchandise car loadings', page)
#print(len(res2))
"""
res2 = googleSearch('site:nscorp.com filetype:pdf weekly loadings', numPages)
links2 = [r.link for r in res2 if 'weekly merchandise' in r.description.lower() and 'car loadings' in r.description.lower()]
#len(links2)
#%%time
def handle(df_ns, dt):
try:
date = pd.to_datetime(dt)
df_ns2 = df_ns.iloc[:, :2].copy()
df_ns2.columns = ['Key', 'Loadings']
df_ns2 = df_ns2.dropna().set_index('Key').iloc[1:]
df_ns2[date] = df_ns2['Loadings'].str.replace(",", "").apply(pd.to_numeric)
return df_ns2[date].T
except Exception as e:
df_ns2[date] = df_ns2['Loadings'].str.replace(",", "").apply(lambda v: pd.to_numeric(v.split()[0]))
#print(df_ns2.head())
#print(e)
return df_ns2[date].T
#@memory.cache
def parseNorfolkSouthern(url, retries=3):
"""
NorfolkSouthern Parser
"""
if retries < 0:
return None
location = url
req = Request(location, data=None, headers={ 'User-Agent': uaString })
data = urlopen(req)
byteData = BytesIO(data.read())
try:
#print(f"Parsing {url}...")
df_ns = read_pdf(byteData, pages='all')
except Exception as e:
sleep(2 ** (3 - retries))
return parseNorfolkSouthern(location, retries-1)
try:
byteData.seek(0)
raw = parser.from_buffer(byteData)
if raw['content']:
dts = re.findall(".* To: ([\d]{2}-[\d]{2}-[\d]{4}).*", raw['content'])
else:
#print(type(df_ns[0]))
dts = [re.search(".* To: ([\d]{2}-[\d]{2}-[\d]{4}).*", df.columns[1]).group(1) for df in df_ns]
return pd.concat([handle(f, t) for f, t in zip(df_ns, dts)], axis=1)
except AttributeError as e:
print(f"Failed on {url}")
#print(e)
#print(df_ns.columns[1], df_ns.columns)
return None
dfs_ns = [parseNorfolkSouthern(l) for l in set(links2)]
#for l in list(set(links2)):
# dfs_ns.append(parseNorfolkSouthern(l))
df_ns_merged = pd.concat(dfs_ns, axis=1).T
# df_ns_merged.to_csv("norfolk_southern.csv", sep='|')
# df_ns_merged = pd.read_csv("norfolk_southern.csv", sep="|")
df_ns_merged = df_ns_merged.applymap(float)
df_ns_merged['dt'] = df_ns_merged.index.map(pd.to_datetime)
df_ns_ts = df_ns_merged.groupby('dt').agg('mean').resample('1M').nearest().reset_index()
doYoyChart(df_ns_ts, metric='Motor Vehicles and Equipment', carrier='Northfolk Southern',
period=12, freq='M', subset=0, size=7, color='black')
doYoyChart(df_ns_ts, metric='Lumber and Wood Products', carrier='Northfolk Southern',
period=12, freq='M', subset=0, size=7, color='forestgreen')
doYoyChart(df_ns_ts, metric='Pulp, Paper and Allied Products', carrier='Northfolk Southern',
period=12, freq='M', subset=0, size=7, color='forestgreen')
doYoyChart(df_ns_ts, metric='Primary Forest Products', carrier='Northfolk Southern',
period=12, freq='M', subset=0, size=7, color='forestgreen')
doYoyChart(df_ns_ts, metric='Chemicals', carrier='Northfolk Southern',
period=12, freq='M', subset=0, size=7, color='darkseagreen')
doYoyChart(df_ns_ts, metric='Iron and Steel Scrap', carrier='Northfolk Southern',
period=12, freq='M', subset=0, size=7, color='grey')
doYoyChart(df_ns_ts, metric='Total Carloadings', carrier='Northfolk Southern',
period=12, freq='M', subset=0, size=7)
doYoyChart(df_ns_ts, metric='Total Intermodal', carrier='Northfolk Southern',
period=12, freq='M', subset=0, size=7)
doYoyChart(df_ns_ts, metric='Metals and Products', carrier='Northfolk Southern',
period=12, freq='M', subset=0, size=7, color='grey')
doYoyChart(df_ns_ts, metric='Metallic Ores', carrier='Northfolk Southern',
period=12, freq='M', subset=0, size=7, color='black')
doYoyChart(df_ns_ts, metric='Container', carrier='Northfolk Southern',
period=12, freq='M', subset=0, size=7)
# https://www.bnsf.com/about-bnsf/financial-information/index.html#Weekly+Carload+Reports
numPages = 11
#"""
res = []
for page in range(1, numPages+1):
sleep(60)
res += googleSearch("site:bnsf.com filetype:pdf weekly imodcarload", first_page=page, pages=numPages)
#"""
#res = googleSearch("site:bnsf.com filetype:pdf weekly imodcarload", pages=numPages)
links = [r.link for r in res if 'weekly' in r.name.lower() and 'imodcarload' in r.name.lower()]
links
#%%time
uaString = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
#@memory.cache
def parseBnsf(link, retries=3):
"""
BNSF Parser
"""
if retries < 0:
print("Failed " + link)
print(e)
return None
location = link
try:
dt = location.split("/")[-1].split(".")[0]
req = Request(location, data=None, headers={ 'User-Agent': uaString })
data = urlopen(req)
df = read_pdf(BytesIO(data.read()), pages='all')[0]
df_t = df.set_index(df.columns[0]).iloc[:, 2]
df_t = df_t.dropna().str.replace(",", "").apply(pd.to_numeric)
df_t = df_t.T
df_t['dt'] = pd.to_datetime(dt)
df_t['source'] = df_t.index.map(lambda v: location)
df_to = df.set_index(df.columns[0]).iloc[:, 5]
df_to = df_to.dropna().str.replace(",", "").apply(pd.to_numeric)
df_to = df_to.T
df_to['dt'] = pd.to_datetime(dt) - pd.DateOffset(weeks=52)
return pd.concat([df_t, df_to], axis=1)
except Exception as e:
sleep(2 ** (3 - retries))
return parseBnsf(location, retries-1)
dfs = [parseBnsf(l) for l in set(links)]
df_bnsf = pd.concat(dfs, axis=1).T.reset_index() #.shape
df_bnsf.columns = ['Total Intermodal'] + df_bnsf.columns[1:].to_list()
use = [c for c in df_bnsf.columns if "source" not in c]
df_bnsf['Total Intermodal'] = df_bnsf['Total Intermodal'].str.replace(",", "").apply(pd.to_numeric)
# df_bnfs.to_csv("bnsf.csv", sep="|")
#df_bnsf = pd.read_csv('bnsf.csv', sep="|")
df_bnsf['dt'] = df_bnsf['dt'].apply(pd.to_datetime)
df_bnsf_ts = df_bnsf[use].groupby('dt').agg('mean').applymap(float).resample('1M').nearest().reset_index()
df_melted = df_bnsf[use].melt(id_vars='dt')
df_melted['value'] = df_melted['value'].apply(float)
df_melted = df_melted.groupby(['dt', 'variable']).agg('mean').reset_index() #.set_index('dt')
df_tab = df_melted.pivot_table(index='dt', columns='variable').resample('1M').mean()
df_bnsf_ts = df_tab.droplevel(0, axis=1).reset_index()
doChart(df=df_bnsf_ts, metric='Total Intermodal', color='slategray')
doYoyChart(df_bnsf_ts, metric='LUMBER/WOOD', period=12, freq='M', color='forestgreen')
doYoyChart(df_bnsf_ts, metric='PULP/PAPER', period=12, freq='M', color='forestgreen', size=7)
doYoyChart(df_bnsf_ts, metric='FOREST PRODUCTS', period=12, freq='M', color='forestgreen', size=7)
doYoyChart(df_bnsf_ts, metric='CHEMICALS', period=12, freq='M', color='darkseagreen', size=7)
doYoyChart(df_bnsf_ts, metric='FOOD', period=12, freq='M', color='green', size=7)
doYoyChart(df_bnsf_ts, metric='Total Intermodal', period=12, freq='M', size=7)
doYoyChart(df_bnsf_ts, metric='IRON & STEEL SCRAP', period=12, freq='M', size=7, color='slategrey')
doYoyChart(df_bnsf_ts, metric='METALS', period=12, freq='M', size=7, color='black')
doYoyChart(df_bnsf_ts, metric='COAL', period=12, freq='M', size=7, color='black')
doYoyChart(df_bnsf_ts, metric='GRAIN', period=12, freq='M', size=7, color='goldenrod')